pacman::p_load(dplyr, readr, tidyr, ggplot2, sf, tmap)
folder = "C:/models/silo/capetown/cape_town_fabilut/silo/"
jobStartDistribution = read_csv(paste(folder, "input/jobStartTimeDistributions.csv",sep =""))
## Parsed with column specification:
## cols(
## time = col_double(),
## Agri = col_double(),
## Mnft = col_double(),
## Util = col_double(),
## Cons = col_double(),
## Retl = col_double(),
## Trns = col_double(),
## Finc = col_double(),
## Rlst = col_double(),
## Admn = col_double(),
## Serv = col_double()
## )
ggplot(data = jobStartDistribution) +
geom_line(inherit.aes = F, aes(x = time/3600, y = Mnft/max(jobStartDistribution$Mnft)), color = "red", size = 1) +
geom_line(inherit.aes = F, aes(x = time/3600, y = Admn/max(jobStartDistribution$Admn)), color = "blue", size = 1) +
ylab("Rel. frequency (red = manufacturing, blue = services)")

jobDurationDistribution = read_csv(paste(folder, "input/jobDurationDistributions.csv",sep =""))
## Parsed with column specification:
## cols(
## time = col_double(),
## Agri = col_double(),
## Mnft = col_double(),
## Util = col_double(),
## Cons = col_double(),
## Retl = col_double(),
## Trns = col_double(),
## Finc = col_double(),
## Rlst = col_double(),
## Admn = col_double(),
## Serv = col_double()
## )
ggplot(data = jobDurationDistribution) +
geom_line(inherit.aes = F, aes(x = time/3600, y = Mnft/max(jobDurationDistribution$Mnft)), color = "red", size = 1) +
geom_line(inherit.aes = F, aes(x = time/3600, y = Admn/max(jobDurationDistribution$Admn)), color = "blue", size = 1) +
ylab("Rel. frequency (red = manufacturing, blue = services)")

#Job start and duration: the data is obtained from Mobilität in Deutshcland, based on trip diaries (trip to work).
tripFrequencyDistribution = read_csv(paste(folder, "input/hts_work_tripLengthFrequencyDistribution.csv",sep =""))
## Parsed with column specification:
## cols(
## TravelTime = col_double(),
## frequency = col_double(),
## utility = col_double()
## )
ggplot(data = tripFrequencyDistribution) +
geom_line(inherit.aes = F, aes(x = TravelTime, y = utility), color = "black", size = 1) +
geom_line(inherit.aes = F, aes(x = TravelTime, y = exp(-0.2 * TravelTime)), color = "orange", size = 1) +
ylab("Rel. frequency (red = manufacturing, blue = services)")

#The trip freq. distribution controls the penalty of longer commutes. In the current setting we have the black line, copied from Munich long time ago. The black
# line reproduces the observed travel time distribution. We found afterwards that the black line can lead to too long commute times in the successive years. If this
# is observed in SILO-Capetown, it could be a good idea to replace this file by an exponential function, such as the orange line = exp(-0.2 * time_minutes).
#This can be simply done by replacing the column utility of this file accordingly. Has to be checked in the changes in commute distance by year.I selected the -0.2
# value for Munich with a zero-growth scenario, assuming that commute distance has to remain relatively stable.
#Crime index unfortunately not assigned. All subplaces (region) have same value, equal to 0.5.
#School quality index unfortunately not assigned. All subplaces (region) have same value, equal to 0.9.
zones = read_csv(paste(folder, "input/zoneSystem.csv",sep =""))
## Parsed with column specification:
## cols(
## Zone = col_double(),
## Area = col_double(),
## JUR_NAME = col_character(),
## ID_county = col_double(),
## ID_city = col_double(),
## Region = col_double()
## )
development = read_csv(paste(folder, "input/development.csv", sep =""))
## Parsed with column specification:
## cols(
## Zone = col_double(),
## FORMAL = col_double(),
## SEMIDETACHED = col_double(),
## MULTIFAMILY = col_double(),
## BACKYARD_INFORMAL = col_double(),
## INFORMAL = col_double(),
## DevCapacity = col_double(),
## DevLandUse = col_double()
## )
#the last two columns define the development capacity. They are expressed in acres (column DevLandUse). Unless specified, the column DevCapacity is not used, and
# would define the capacity in dwellings.
#The previos colums (dummy variables per dd type) simply define wether it is allowed or not to develop a dwelling of the type. They are all "1".
shp = st_read(paste(folder, "input/zonesShapeFile/zones.shp",sep =""))
## Reading layer `zones' from data source `C:\models\silo\capetown\cape_town_fabilut\silo\input\zonesShapeFile\zones.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2980 features and 32 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -62801.14 ymin: -3795547 xmax: 13553.27 ymax: -3701471
## projected CRS: _MI_0
shp = shp %>% left_join(development, by = c("ID_cell" = "Zone"))
map = tm_basemap(leaflet::providers$CartoDB)
map = map + tm_shape(shp) +
tm_polygons(col ="DevLandUse")
tmap_leaflet(map)
## Warning: The shape shp is invalid. See sf::st_is_valid
#The developable land is extracted from the field area_1 (a 1 % of this, as the word doc says).
# Unfortunately, this field seems to be a wrong computation of the area!!
shp$true_area = st_area(shp)
ggplot(shp, aes(x = area_1, y = DevLandUse)) + geom_point()

ggplot(shp, aes(x = as.numeric(true_area), y = DevLandUse)) + geom_point()

pp = read_csv(paste(folder, "microData/pp_2011.csv", sep = ""))
## Parsed with column specification:
## cols(
## id = col_double(),
## hhid = col_double(),
## age = col_double(),
## gender = col_double(),
## relationShip = col_character(),
## occupation = col_double(),
## driversLicense = col_logical(),
## workplace = col_double(),
## income = col_double(),
## race = col_character()
## )
hh = read_csv(paste(folder, "microData/hh_2011.csv", sep = ""))
## Parsed with column specification:
## cols(
## id = col_double(),
## dwelling = col_double(),
## hhSize = col_double(),
## autos = col_double()
## )
dd = read_csv(paste(folder, "microData/dd_2011.csv", sep = ""))
## Parsed with column specification:
## cols(
## id = col_double(),
## zone = col_double(),
## type = col_character(),
## hhID = col_double(),
## bedrooms = col_double(),
## quality = col_double(),
## monthlyCost = col_double(),
## yearBuilt = col_double(),
## coordX = col_double(),
## coordY = col_double()
## )
jj = read_csv(paste(folder, "microData/jj_2011.csv", sep = ""))
## Parsed with column specification:
## cols(
## id = col_double(),
## zone = col_double(),
## personId = col_double(),
## type = col_character(),
## coordX = col_double(),
## coordY = col_double(),
## startTime = col_double(),
## duration = col_double()
## )
pp %>% group_by(race) %>% summarize(count = n(), ave_income = mean(income))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 3
## race count ave_income
## <chr> <int> <dbl>
## 1 BLACK 1494327 329.
## 2 COLOURED 1783284 527.
## 3 OTHER 143124 1177.
## 4 WHITE 641948 2644.
ggplot(pp, aes(x = income, color = race)) + stat_ecdf() + scale_x_log10() + xlab("Log(income)")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 1671071 rows containing non-finite values (stat_ecdf).

#The income is defined differently by races.
pp = pp %>% left_join(hh, by = c("hhid" = "id"))
pp = pp %>% left_join(dd, by = c("dwelling" = "id"))
pp_by_zone_and_race = pp %>% group_by(race,zone) %>% summarize(count = n()) %>% pivot_wider(names_from = "race", values_from = "count", values_fill = 0)
## `summarise()` regrouping output by 'race' (override with `.groups` argument)
shp = st_read(paste(folder, "input/zonesShapeFile/zones.shp",sep =""))
## Reading layer `zones' from data source `C:\models\silo\capetown\cape_town_fabilut\silo\input\zonesShapeFile\zones.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2980 features and 32 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -62801.14 ymin: -3795547 xmax: 13553.27 ymax: -3701471
## projected CRS: _MI_0
shp = shp %>% left_join(pp_by_zone_and_race, by = c("ID_cell" = "zone"))
map = tm_basemap(leaflet::providers$CartoDB)
map = map + tm_shape(shp, "black") +
tm_polygons(col ="BLACK", style = "quantile", convert2density = T)
map = map + tm_shape(shp, "white") +
tm_polygons(col ="WHITE", style = "quantile", convert2density = T)
tmap_leaflet(map)
## Warning: The shape black is invalid. See sf::st_is_valid
## Warning: The shape white is invalid. See sf::st_is_valid